Epidemic curve
dataCurve <- data.frame(
Incident = c(rep("Severe Covid-19 case", 464), rep("Severe adverse reaction", 76)),
Date = rep(seq.POSIXt(as_datetime("2020-06-01 00:00:00", tz = "CET"),
as_datetime("2023-02-01 00:00:00", tz = "CET"), by = "month"),
c(19, 11, 11, 9, 13, 21, 58, 49, 73, 83, 33, 19, 14 ,13, 16, 11,
3, 3, 11, 26, 8, 2, 4, 3, 8, 1, 4, 2, 4, 4, 2, 1, 1))
)
dataDoses <- data.frame(
Date = seq.Date(as.Date("2021-01-01"), as.Date("2023-01-01"), by = "month"),
Ndoses = c(4573, 5343, 5474, 27699, 73329, 137575, 109853, 62694, 29315, 9604,
10456, 95054, 81274, 17859, 3998, 2385, 2384, 1772, 1931, 1471,
2326, 1872, 7005, 9746, 4759)
)
monthly_breaks <- seq.POSIXt(
from = as_datetime("2020-01-01 00:00:00", tz = "CET"),
to = as_datetime("2023-04-01 00:00:00", tz = "CET"),
by = "month"
)
ymax <- 87.5
coeff <- (140000 / ymax)
dataDoses$Ndoses <- dataDoses$Ndoses / coeff
# Initialize the plot with a dataset (DATA_ex1_1)
ggplot(dataCurve) +
# Add a histogram with dodged bars, colored by the "Incident" variable
geom_histogram(
mapping = aes(x = Date, fill = Incident), # Map 'cas_date' to x-axis, and 'Incident' to fill
breaks = monthly_breaks, # Use specified monthly breaks
closed = "left", # Define intervals as left-closed
color = "white", # Set white border for bars
position = "dodge" # Dodge bars side by side
) +
# Add horizontal grid lines with white color
geom_hline(yintercept = 1:ymax, color = "white") +
# Add vertical reference lines for specific dates
geom_vline(
xintercept = as_datetime(c("2021-01-01 00:00:00", "2022-01-01 00:00:00"), tz = "CET"), # Define dates as datetime objects
color = "lavenderblush4", # Set line color
size = 1 # Set line thickness
) +
# Annotate rectangular periods of interest
annotate(
"rect", # Specify rectangle annotation
xmin = as_datetime("2020-04-01 00:00:00", tz = "CET"), # Start of the rectangle
xmax = as_datetime("2020-06-01 00:00:00", tz = "CET"), # End of the rectangle
ymin = 0, # Rectangle starts at y = 0
ymax = ymax, # Rectangle ends at y = ymax
fill = "gray", # Set fill color to gray
alpha = 0.25 # Set transparency
) +
# Repeat annotation for additional periods
annotate("rect", xmin = as_datetime("2020-11-01 00:00:00", tz = "CET"),
xmax = as_datetime("2021-01-01 00:00:00", tz = "CET"), ymin = 0, ymax = ymax, fill = "gray", alpha = 0.25) +
annotate("rect", xmin = as_datetime("2021-04-01 00:00:00", tz = "CET"),
xmax = as_datetime("2021-06-01 00:00:00", tz = "CET"), ymin = 0, ymax = ymax, fill = "gray", alpha = 0.25) +
# Add a line plot for data in DATA_ex1_2
geom_line(
data = dataDoses,
aes(
x = as_datetime(paste(Date, "00:00:00"), tz = "CET"), # Convert Date to datetime
y = Ndoses, # Use Ndoses for y-axis
colour = "Number of vaccine doses" # Set color legend
),
linetype = 5 # Set line type
) +
# Add points on the line plot
geom_point(
data = dataDoses,
aes(x = as_datetime(paste(Date, "00:00:00"), tz = "CET"), y = Ndoses), # Map Date and Ndoses
colour = "darkslateblue", # Set color
shape = 16, # Use solid circles
size = 2 # Set point size
) +
# Format x-axis as datetime
scale_x_datetime(
expand = c(0, 0), # Remove padding
date_breaks = "month", # Major breaks every month
date_minor_breaks = "month", # Minor breaks also monthly
date_labels = "%m/%y" # Format as MM/YY
) +
# Customize color scale for doses
scale_colour_manual("Doses", values = "darkslateblue") +
# Use classic theme for the plot
theme_classic() +
# Modify various theme elements
theme(
panel.grid.minor.y = element_line(colour = "lightgray", size = 1), # Minor grid lines
panel.grid.major.y = element_line(colour = "lavenderblush4", size = 1), # Major grid lines
axis.text.x = element_text(angle = 90), # Rotate x-axis labels
axis.text.y.right = element_text(color = "darkslateblue"), # Color right y-axis text
axis.line.y.right = element_line(color = "darkslateblue"), # Color right y-axis line
axis.title.y.right = element_text(color = "darkslateblue"), # Color right y-axis title
legend.position = "bottom"
) +
# Format y-axis with a secondary axis
scale_y_continuous(
expand = c(0, 0), # Remove padding
limits = c(0, ymax), # Set y-axis limits
breaks = seq(0, 90, 10), # Major breaks every 10
sec.axis = sec_axis(
trans = ~ . * coeff, # Transform secondary axis
name = "Number of doses dispensed", # Secondary axis title
breaks = seq(0, 90, 10)*coeff, # Secondary axis breaks
labels = formatC( # Format secondary axis labels
seq(0, 90, 10)*coeff,
format = "f",
big.mark = " ",
digits = 0
)
)
) +
# Add titles and axis labels
labs(title = "", x = " ", y = "Number of incidents") +
# Add text annotations for specific periods
geom_text(mapping = aes_q(
x = as_datetime("2020-07-01 00:00:00", tz = "CET"), y = 82, label = "Period n°1"
)) +
geom_text(mapping = aes_q(
x = as_datetime("2021-07-01 00:00:00", tz = "CET"), y = 82, label = "Period n°2"
)) +
geom_text(mapping = aes_q(
x = as_datetime("2022-08-01 00:00:00", tz = "CET"), y = 82, label = "Period n°3"
))

# Create a data frame for histogram visualization
dataCurve <- data.frame(
# "Sex" column with repeated values for "Male" and "Female"
Sex = c(rep("Male", 323), rep("Female", 349)),
# "Date" column with monthly timestamps repeated according to specified counts
Date = c(
# Male dates: repeated monthly sequence with specific counts for each month
rep(seq.POSIXt(
as_datetime("2024-01-01 00:00:00", tz = "CET"),
as_datetime("2025-01-01 00:00:00", tz = "CET"),
by = "month"
), c(15, 21, 27, 26, 33, 38, 30, 22, 28, 17, 20, 19, 27)),
# Female dates: repeated monthly sequence with specific counts for each month
rep(seq.POSIXt(
as_datetime("2024-01-01 00:00:00", tz = "CET"),
as_datetime("2025-01-01 00:00:00", tz = "CET"),
by = "month"
), c(34, 41, 56, 32, 29, 33, 37, 26, 21, 11, 9, 12, 8))
)
)
# Define monthly breaks for the histogram
monthly_breaks <- seq.POSIXt(
from = as_datetime("2024-01-01 00:00:00", tz = "CET"), # Start of the range
to = as_datetime("2025-01-01 00:00:00", tz = "CET"), # End of the range
by = "month" # Break intervals (monthly)
)
# Create the histogram using ggplot2
ggplot(dataCurve) +
# Add a histogram layer
geom_histogram(
mapping = aes(x = Date), # Map the "Date" column to the x-axis
breaks = monthly_breaks, # Use predefined monthly breaks for bins
closed = "left", # Bins include the left boundary
color = "white", # White borders around bars
position = "dodge", # Position bars side by side
fill = "#0f766e" # Fill color for bars
) +
# Add horizontal lines at intervals of 5
geom_hline(
yintercept = seq(5, ymax, by = 5), # Sequence of horizontal line positions
color = "white" # Line color
) +
# Add a horizontal line at y=0
geom_hline(
aes(yintercept = 0)
) +
# Customize x-axis labels and ticks
scale_x_datetime(
expand = c(0, 0), # No expansion at the axis limits
date_breaks = "month", # Major breaks at each month
date_minor_breaks = "month", # Minor breaks at each month
date_labels = "%m/%y" # Format labels as "month/year"
) +
# Customize y-axis labels and ticks
scale_y_continuous(
expand = c(0, 0), # No expansion at the axis limits
limits = c(0, 60), # Set y-axis limits between 0 and 60
breaks = seq(0, 60, 10) # Major breaks at intervals of 10
) +
# Add plot titles and axis labels
labs(
title = "", # Main title (empty)
caption = "", # Caption (empty)
x = " ", # X-axis label (empty space)
y = "Number of cases" # Y-axis label
) +
# Create separate panels for each sex
facet_grid(
rows = vars(Sex) # Facet by "Sex" variable (one row for each value)
) +
# Apply a classic theme
theme_classic() +
# Customize the appearance of grid lines and axis text
theme(
panel.grid.minor.y = element_line(
colour = "lightgray", # Minor grid line color
size = 1 # Minor grid line size
),
panel.grid.major.y = element_line(
colour = "lavenderblush4", # Major grid line color
size = 1 # Major grid line size
),
axis.text.x = element_text(
angle = 0, # No rotation for x-axis text
vjust = 0.5, # Vertical adjustment for x-axis text
hjust = -0.1 # Horizontal adjustment for x-axis text
)
)

dataCurve <- data.frame(
Sex = c(rep("Male", 323), rep("Female", 349)),
Date = c(rep(seq.POSIXt(as_datetime("2024-01-01 00:00:00", tz = "CET"),
as_datetime("2025-01-01 00:00:00", tz = "CET"), by = "month"),
c(15, 21, 27, 26, 33, 38, 30, 22, 28, 17, 20, 19, 27)),
rep(seq.POSIXt(as_datetime("2024-01-01 00:00:00", tz = "CET"),
as_datetime("2025-01-01 00:00:00", tz = "CET"), by = "month"),
c(34, 41, 56, 32, 29, 33, 37, 26, 21, 11, 9, 12, 8)))
)
monthly_breaks <- seq.POSIXt(
from = as_datetime("2024-01-01 00:00:00", tz = "CET"),
to = as_datetime("2025-01-01 00:00:00", tz = "CET"),
by = "month"
)
ggplot(
dataCurve
) + # Histogramme de base
geom_bar(
mapping = aes(
x = Date,
fill = Sex
),
breaks = monthly_breaks,
closed = "left",
color = "white",
position = "stack"
) + # Lignes horizontales
geom_hline(
yintercept = seq(5, ymax, by = 5),
color = "white"
) +
geom_hline(
aes(yintercept = 0)
) +# Gestion des labels & ticks de l'axe x
scale_x_datetime(
expand = c(0,0),
date_breaks = "month",
date_minor_breaks = "month",
date_labels = "%m/%y"
) + # Separation de l'axe y
scale_y_continuous(
expand = c(0,0),
limits = c(0, 90),
breaks = seq(0, 90, 10)
) + # Titres
labs(
title = "",
caption = "",
x = " ",
y = "Number of cases"
) +
theme_classic() +# Suppression grille horizontale
theme(
panel.grid.minor.y = element_line(
colour = "lightgray",
size = 1
),
panel.grid.major.y = element_line(
colour = "lavenderblush4",
size = 1
)
)

Care trajectories
Sankey
createSankeyData <- function(data, states, timesColumns) {
# Function to prepare data for a Sankey diagram
# Arguments:
# data: A dataframe containing sequential data
# states: A vector of unique states
# timesColumns: A vector of column names representing time steps
data <- as.data.frame(data) # Ensure the input is a dataframe
n_states <- length(states) # Number of unique states
n_times <- length(timesColumns) # Number of time columns (steps)
# Convert time columns into factors with specified levels (states)
for (i in 1:n_times) {
data[, timesColumns[i]] <- factor(data[, timesColumns[i]], levels = states)
}
# Define colors for the nodes (states) and links between states
nodesCols <- c("#AAC0AF", "#B28B84", "#1C4073", "#0f766e", "#653239", "#472C1B", "#5C2751")[1:n_states]
linksCols <- c("#D0DCD3", "#D0B8B4", "#285CA4", "#17B5A7", "#964A54", "#76492D", "#8F3D7E")[1:n_states]
vals <- c() # Initialize a vector to store transition counts
# Calculate transitions between consecutive time steps for each state
for (i in 2:n_times) {
for (j in 1:n_states) {
# Count occurrences of transitions from state j at time (i-1) to all states at time i
vals <- c(vals, table(data[, timesColumns[i]][data[, timesColumns[i - 1]] == states[j]]))
}
}
# Prepare Sankey diagram data structure
dataSankeyTem <- list(
Nodes = data.frame(
X = 1:(n_states * n_times), # Node indices (unique for each state and time step)
label = rep(states, n_times), # Node labels (repeated for each time step)
color = rep(nodesCols, n_times) # Node colors
),
Links = data.frame(
source = c(rep(1:(n_states * (n_times - 1)), each = n_states)) - 1, # Source nodes for transitions
target = as.vector(sapply(split((n_states + 1):(n_states * n_times),
rep(1:(n_times - 1), each = n_states)),
function(x) {rep(x, n_states)})) - 1, # Target nodes for transitions
value = vals, # Transition counts
color = rep(rep(linksCols, each = n_states), n_times - 1) # Colors for links
)
)
}
# Example usage: Create Sankey data for three time steps with predefined states
sankeyData <- createSankeyData(
data.frame(
T1 = sample(
c("Initial treatment", "Substitution", "No treatment"),
size = 1000, prob = c(0.95, 0.025, 0.025), replace = TRUE),
T2 = sample(
c("Initial treatment", "Substitution", "No treatment"),
size = 1000, prob = c(0.75, 0.125, 0.125), replace = TRUE),
T3 = sample(
c("Initial treatment", "Substitution", "No treatment"),
size = 1000, prob = c(0.5, 0.25, 0.25), replace = TRUE)),
c("Initial treatment", "Substitution", "No treatment"), # Define states
c("T1", "T2", "T3") # Define time columns
)
plotSankey <- function(Nodes, Links, add.prop = FALSE) {
# Function to create a Sankey diagram using plotly.
# Args:
# Nodes: Data frame containing node details (labels, colors, etc.).
# Links: Data frame containing link details (source, target, values, colors).
# add.prop: Logical. If TRUE, adds percentage proportions to node labels.
if (add.prop) {
# Check if all sources in Links have an equal number of occurrences
if (all(table(Links$source) == table(Links$source)[1])) {
nrow_per_times <- table(Links$source)[1] ^ 2 # Determine rows per iteration
n_times <- nrow(Links) / nrow_per_times # Calculate number of iterations
for (i in 1:n_times) {
# Extract subset of Links for current iteration
tab <- Links[((i - 1) * nrow_per_times + 1):((i - 1) * nrow_per_times + nrow_per_times), ]
# Calculate proportions for targets
vals <- as.numeric(by(tab$value, tab$target, sum)) # Sum values by target
Nodes[Nodes$X %in% (unique(tab$target) + 1), "label"] <- paste0(
Nodes[Nodes$X %in% (unique(tab$target) + 1), "label"],
" (",
formatC(vals / sum(vals) * 100, digits = 1, format = 'f'), # Format as percentage
"%)"
)
if (i == 1) {
# Calculate proportions for sources during the first iteration
vals <- as.numeric(by(tab$value, tab$source, sum)) # Sum values by source
Nodes[Nodes$X %in% (unique(tab$source) + 1), "label"] <- paste0(
Nodes[Nodes$X %in% (unique(tab$source) + 1), "label"],
" (",
formatC(vals / sum(vals) * 100, digits = 1, format = 'f'), # Format as percentage
"%)"
)
}
}
} else {
warning(
"Links arg does not have equal number of each source and function cannot automatically calculate proportions."
)
}
}
# Convert colors in Links to RGBA format with transparency
Links$color <- apply(grDevices::col2rgb(Links$color), 2, function(x) {
paste0("rgba(", x[1], ",", x[2], ",", x[3], ",0.4)") # Add 40% transparency
})
# Create the Sankey diagram using plotly
fig <- plot_ly(
type = "sankey", # Specify Sankey diagram
orientation = "h", # Horizontal orientation
alpha_stroke = 0.2, # Node border transparency
node = list(
label = Nodes$label, # Node labels
color = Nodes$color, # Node colors
pad = 15, # Padding between nodes
thickness = 20, # Node thickness
line = list(color = "black", width = 0.5) # Node border style
),
link = list(
source = Links$source, # Source nodes for links
target = Links$target, # Target nodes for links
value = Links$value, # Link values
color = Links$color # Link colors (RGBA)
)
)
# Customize the layout of the Sankey diagram
fig <- fig %>% layout(
font = list(
size = 14, # Font size for labels
color = "black", # Font color
weight = "bold" # Font weight
)
)
# Return the generated plot
fig
}
plotSankey(sankeyData$Nodes, sankeyData$Links, add.prop = TRUE)
Carpet
# Create a dataset simulating treatment sequences
carpetData <- data.frame(
T1 = sample( # Initial treatment stage
c("Initial treatment", "Substitution", "No treatment"),
size = 100000, prob = c(0.95, 0.025, 0.025), replace = TRUE
),
T2 = sample( # Second treatment stage
c("Initial treatment", "Substitution", "No treatment"),
size = 100000, prob = c(0.75, 0.125, 0.125), replace = TRUE
),
T3 = sample( # Third treatment stage
c("Initial treatment", "Substitution", "No treatment"),
size = 100000, prob = c(0.65, 0.25, 0.1), replace = TRUE
),
T4 = sample( # Third treatment stage
c("Initial treatment", "Substitution", "No treatment"),
size = 100000, prob = c(0.5, 0.25, 0.25), replace = TRUE
),
T5 = sample( # Third treatment stage
c("Initial treatment", "Substitution", "No treatment"),
size = 100000, prob = c(0.35, 0.4, 0.25), replace = TRUE
)
) %>%
group_by(T1, T2, T3, T4, T5) %>% # Group data by unique sequences
summarise(w = n()) # Summarize the weight (frequency) of each sequence
# Define a sequence object using the TraMineR package
seqCarpet <- seqdef(
data = carpetData[, c("T1", "T2", "T3", "T4", "T5")], # Extract sequence columns
weights = carpetData$w, # Use weights for the sequences
cpal = c("#AAC0AF", "#B28B84", "#1C4073"), # Custom color palette for states
cnames = c("T1", "T2", "T3", "T4", "T5") # Custom names for the sequence stages
)
# Define substitution costs using a constant method
couts <- seqsubm(seqCarpet, method = "CONSTANT", cval = 2)
# Compute optimal matching distances
seq.om <- seqdist(seqCarpet,
method = "OM", # Optimal Matching (OM) method
indel = 1, # Insertion/deletion cost
sm = couts) # Substitution matrix
# Perform hierarchical clustering on the sequence distances
seq.dist <- hclust(as.dist(seq.om), method = "ward.D2")
# Create sequence clusters by cutting the dendrogram into 2 groups
seq_cl <- factor(cutree(seq.dist, 3), labels = c("Class n°1", "Class n°2", "Class n°3"))
# Plot individual sequence plots and group sequence plots side by side
ggarrange(
ggseqiplot(seqCarpet, facet_ncol = 1, no.n = TRUE) + # Individual plot
theme(
panel.spacing = unit(0.1, "lines"), # Adjust panel spacing
axis.text.y = element_text(colour = "white"), # Hide y-axis text
axis.ticks.y = element_line(colour = "white") # Hide y-axis ticks
) +
labs(y = ""), # Remove y-axis label
ggseqiplot( # Plot grouped sequences
seqCarpet,
group = seq_cl, # Group by clusters
facet_ncol = 1,
no.n = TRUE
) +
force_panelsizes(rows = table(seq_cl)) + # Adjust panel sizes by group
theme(
panel.spacing = unit(0.1, "lines"), # Adjust panel spacing
axis.text.y = element_text(colour = "white"), # Hide y-axis text
axis.ticks.y = element_line(colour = "white") # Hide y-axis ticks
) +
labs(y = ""), # Remove y-axis label
ncol = 2, # Arrange plots in 2 columns
nrow = 1, # Arrange plots in 1 row
common.legend = TRUE, # Use a common legend
legend = "bottom" # Place legend at the bottom
)
